{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "accc805b-b661-441c-b1f8-6dd0a933a762",
   "metadata": {},
   "source": [
    "# Lesson 7: Chatting with the SEC Knowledge Graph"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d5c5869a-656e-4eae-8d12-594cbf5c970d",
   "metadata": {},
   "source": [
    "<p style=\"background-color:#fd4a6180; padding:15px; margin-left:20px\"> <b>Note:</b> This notebook takes about 30 seconds to be ready to use. Please wait until the \"Kernel starting, please wait...\" message clears from the top of the notebook before running any cells. You may start the video while you wait.</p>"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2773ac88-78f8-4062-b468-a47e4d382652",
   "metadata": {},
   "source": [
    "### Import packages and set up Neo4j"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "efbb9a0a-59f4-4bf9-8186-4af17ae951dc",
   "metadata": {
    "height": 301
   },
   "outputs": [],
   "source": [
    "from dotenv import load_dotenv\n",
    "import os\n",
    "\n",
    "import textwrap\n",
    "\n",
    "# Langchain\n",
    "from langchain_community.graphs import Neo4jGraph\n",
    "from langchain_community.vectorstores import Neo4jVector\n",
    "from langchain_openai import OpenAIEmbeddings\n",
    "from langchain.chains import RetrievalQAWithSourcesChain\n",
    "from langchain.prompts.prompt import PromptTemplate\n",
    "from langchain.chains import GraphCypherQAChain\n",
    "from langchain_openai import ChatOpenAI\n",
    "\n",
    "# Warning control\n",
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0ff1dead-df1d-4e59-a3c1-42056a4ba091",
   "metadata": {
    "height": 335
   },
   "outputs": [],
   "source": [
    "# Load from environment\n",
    "load_dotenv('.env', override=True)\n",
    "NEO4J_URI = os.getenv('NEO4J_URI')\n",
    "NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')\n",
    "NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')\n",
    "NEO4J_DATABASE = os.getenv('NEO4J_DATABASE') or 'neo4j'\n",
    "OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')\n",
    "\n",
    "# Note the code below is unique to this course environment, and not a \n",
    "# standard part of Neo4j's integration with OpenAI. Remove if running \n",
    "# in your own environment.\n",
    "OPENAI_ENDPOINT = os.getenv('OPENAI_BASE_URL') + '/embeddings'\n",
    "\n",
    "# Global constants\n",
    "VECTOR_INDEX_NAME = 'form_10k_chunks'\n",
    "VECTOR_NODE_LABEL = 'Chunk'\n",
    "VECTOR_SOURCE_PROPERTY = 'text'\n",
    "VECTOR_EMBEDDING_PROPERTY = 'textEmbedding'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4cf9637c-76f9-4d52-894b-4f01233055d5",
   "metadata": {
    "height": 80
   },
   "outputs": [],
   "source": [
    "kg = Neo4jGraph(\n",
    "    url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b783aaff-4042-4254-ad79-b87886dc6541",
   "metadata": {},
   "source": [
    "### Explore the updated SEC documents graph\n",
    "In this lesson, you'll be working with an updated graph that also includes the address information discussed in the video\n",
    "- Some outputs below may differ slightly from the video\n",
    "- Start by checking the schema of the graph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "48dfcba0-9d4c-49c3-87b8-9b9fb4b96c67",
   "metadata": {
    "height": 46
   },
   "outputs": [],
   "source": [
    "kg.refresh_schema()\n",
    "print(textwrap.fill(kg.schema, 60))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7121c96c-a5dc-4d91-b45f-e5aa53a4b4d7",
   "metadata": {},
   "source": [
    "- Check the address of a random Manager\n",
    "- Note: the company returned by the following query may differ from the one in the video"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6c50f182-f654-4684-b8e7-8cd6f1db1668",
   "metadata": {
    "height": 97
   },
   "outputs": [],
   "source": [
    "kg.query(\"\"\"\n",
    "MATCH (mgr:Manager)-[:LOCATED_AT]->(addr:Address)\n",
    "RETURN mgr, addr\n",
    "LIMIT 1\n",
    "\"\"\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "36877ab6-7d35-4da0-9934-a45b8f76a770",
   "metadata": {},
   "source": [
    "- Full text search for a manager named Royal Bank"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bd26162c-10f8-4cfe-bc47-a63b1f169fd2",
   "metadata": {
    "height": 114
   },
   "outputs": [],
   "source": [
    "kg.query(\"\"\"\n",
    "  CALL db.index.fulltext.queryNodes(\n",
    "         \"fullTextManagerNames\", \n",
    "         \"royal bank\") YIELD node, score\n",
    "  RETURN node.managerName, score LIMIT 1\n",
    "\"\"\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "616a7e3a-fd38-4168-9591-102718687635",
   "metadata": {},
   "source": [
    "- Find location of Royal Bank"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fea0438a-e0bd-4a5a-8e74-7b7a923718b5",
   "metadata": {
    "height": 165
   },
   "outputs": [],
   "source": [
    "kg.query(\"\"\"\n",
    "CALL db.index.fulltext.queryNodes(\n",
    "         \"fullTextManagerNames\", \n",
    "         \"royal bank\"\n",
    "  ) YIELD node, score\n",
    "WITH node as mgr LIMIT 1\n",
    "MATCH (mgr:Manager)-[:LOCATED_AT]->(addr:Address)\n",
    "RETURN mgr.managerName, addr\n",
    "\"\"\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5a2d0f56-0131-4e07-b2c1-e2ecb0075aa5",
   "metadata": {},
   "source": [
    "- Determine which state has the most investment firms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b0840ff4-c451-4e40-a4fc-18642d452afb",
   "metadata": {
    "height": 131
   },
   "outputs": [],
   "source": [
    "kg.query(\"\"\"\n",
    "  MATCH p=(:Manager)-[:LOCATED_AT]->(address:Address)\n",
    "  RETURN address.state as state, count(address.state) as numManagers\n",
    "    ORDER BY numManagers DESC\n",
    "    LIMIT 10\n",
    "\"\"\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "79c150e6-76bb-416f-aa39-393ac250ce5f",
   "metadata": {},
   "source": [
    "- Determine which state has the most companies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8696d17b-7f0d-4f67-b3e8-34c8dc04cb3e",
   "metadata": {
    "height": 114
   },
   "outputs": [],
   "source": [
    "kg.query(\"\"\"\n",
    "  MATCH p=(:Company)-[:LOCATED_AT]->(address:Address)\n",
    "  RETURN address.state as state, count(address.state) as numCompanies\n",
    "    ORDER BY numCompanies DESC\n",
    "\"\"\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f700c3f8-b0b1-42b2-a292-41b92784d027",
   "metadata": {},
   "source": [
    "- What are the cities in California with the most investment firms?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8343409c-8e93-4463-9499-94d0c27aa73e",
   "metadata": {
    "height": 148
   },
   "outputs": [],
   "source": [
    "kg.query(\"\"\"\n",
    "  MATCH p=(:Manager)-[:LOCATED_AT]->(address:Address)\n",
    "         WHERE address.state = 'California'\n",
    "  RETURN address.city as city, count(address.city) as numManagers\n",
    "    ORDER BY numManagers DESC\n",
    "    LIMIT 10\n",
    "\"\"\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9cbd5777-7663-4941-98d2-8649e4e4d410",
   "metadata": {},
   "source": [
    "- Which city in California has the most companies listed?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1a978b52-a0e2-4ace-8002-b4e92fc6f7a5",
   "metadata": {
    "height": 131
   },
   "outputs": [],
   "source": [
    "kg.query(\"\"\"\n",
    "  MATCH p=(:Company)-[:LOCATED_AT]->(address:Address)\n",
    "         WHERE address.state = 'California'\n",
    "  RETURN address.city as city, count(address.city) as numCompanies\n",
    "    ORDER BY numCompanies DESC\n",
    "\"\"\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e377e9ea-824e-4a98-a6ae-fdcd8f8f740d",
   "metadata": {},
   "source": [
    "- What are top investment firms in San Francisco?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bf979e5f-2e46-46f2-a0a0-4cfb011b3640",
   "metadata": {
    "height": 165
   },
   "outputs": [],
   "source": [
    "kg.query(\"\"\"\n",
    "  MATCH p=(mgr:Manager)-[:LOCATED_AT]->(address:Address),\n",
    "         (mgr)-[owns:OWNS_STOCK_IN]->(:Company)\n",
    "         WHERE address.city = \"San Francisco\"\n",
    "  RETURN mgr.managerName, sum(owns.value) as totalInvestmentValue\n",
    "    ORDER BY totalInvestmentValue DESC\n",
    "    LIMIT 10\n",
    "\"\"\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d723b3a2-25db-497c-ac23-b4ffb0afc657",
   "metadata": {},
   "source": [
    "- What companies are located in Santa Clara?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6b32ef15-05c6-43fb-a34a-536dfdfd8c07",
   "metadata": {
    "height": 97
   },
   "outputs": [],
   "source": [
    "kg.query(\"\"\"\n",
    "  MATCH (com:Company)-[:LOCATED_AT]->(address:Address)\n",
    "         WHERE address.city = \"Santa Clara\"\n",
    "  RETURN com.companyName\n",
    "\"\"\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ab8a1379-d7c3-46a7-b730-13e1cc913027",
   "metadata": {},
   "source": [
    "- What companies are near Santa Clara?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "61d92aea-e612-411c-8935-4087ce4a7247",
   "metadata": {
    "height": 131
   },
   "outputs": [],
   "source": [
    "kg.query(\"\"\"\n",
    "  MATCH (sc:Address)\n",
    "    WHERE sc.city = \"Santa Clara\"\n",
    "  MATCH (com:Company)-[:LOCATED_AT]->(comAddr:Address)\n",
    "    WHERE point.distance(sc.location, comAddr.location) < 10000\n",
    "  RETURN com.companyName, com.companyAddress\n",
    "\"\"\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6222a9da-43de-4e95-882a-9de81c06c604",
   "metadata": {},
   "source": [
    "- What investment firms are near Santa Clara?\n",
    "- Try updating the distance in the query to expand the search radius"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "39f9dcb2-710c-429e-bdc6-5090a2ccb464",
   "metadata": {
    "height": 148
   },
   "outputs": [],
   "source": [
    "kg.query(\"\"\"\n",
    "  MATCH (address:Address)\n",
    "    WHERE address.city = \"Santa Clara\"\n",
    "  MATCH (mgr:Manager)-[:LOCATED_AT]->(managerAddress:Address)\n",
    "    WHERE point.distance(address.location, \n",
    "        managerAddress.location) < 10000\n",
    "  RETURN mgr.managerName, mgr.managerAddress\n",
    "\"\"\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ae96ce5f-81fd-4465-9ad1-309154de3fe2",
   "metadata": {},
   "source": [
    "- Which investment firms are near Palo Alto Networks?\n",
    "- Note that full-text search is able to handle typos!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9f82d4ea-c8fc-4508-969a-163480469f12",
   "metadata": {
    "height": 301
   },
   "outputs": [],
   "source": [
    "# Which investment firms are near Palo Aalto Networks?\n",
    "kg.query(\"\"\"\n",
    "  CALL db.index.fulltext.queryNodes(\n",
    "         \"fullTextCompanyNames\", \n",
    "         \"Palo Aalto Networks\"\n",
    "         ) YIELD node, score\n",
    "  WITH node as com\n",
    "  MATCH (com)-[:LOCATED_AT]->(comAddress:Address),\n",
    "    (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)\n",
    "    WHERE point.distance(comAddress.location, \n",
    "        mgrAddress.location) < 10000\n",
    "  RETURN mgr, \n",
    "    toInteger(point.distance(comAddress.location, \n",
    "        mgrAddress.location) / 1000) as distanceKm\n",
    "    ORDER BY distanceKm ASC\n",
    "    LIMIT 10\n",
    "\"\"\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "28758b90-bdc8-4fb0-a18c-fede97d02bb6",
   "metadata": {},
   "source": [
    "- Try pausing the video and modifying queries above to further explore the graph\n",
    "- You can learn more about Cypher at the neo4j website: https://neo4j.com/product/cypher-graph-query-language/ "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "24b6e2de-90e4-4341-b68f-c4ccc9d9d8a2",
   "metadata": {},
   "source": [
    "### Writing Cypher with an LLM"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "da712612-b789-4c6f-a473-bc7f9b5d32fe",
   "metadata": {},
   "source": [
    "In this section, you'll use few-shot learning to teach an LLM to write Cypher\n",
    "- You'll use the OpenAI's GPT 3.5 model \n",
    "- You'll also use a new Neo4j integration within LangChain called **GraphCypherQAChain**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7bc2a60a-0da2-473c-ae3c-1684689da2fa",
   "metadata": {
    "height": 386
   },
   "outputs": [],
   "source": [
    "CYPHER_GENERATION_TEMPLATE = \"\"\"Task:Generate Cypher statement to \n",
    "query a graph database.\n",
    "Instructions:\n",
    "Use only the provided relationship types and properties in the \n",
    "schema. Do not use any other relationship types or properties that \n",
    "are not provided.\n",
    "Schema:\n",
    "{schema}\n",
    "Note: Do not include any explanations or apologies in your responses.\n",
    "Do not respond to any questions that might ask anything else than \n",
    "for you to construct a Cypher statement.\n",
    "Do not include any text except the generated Cypher statement.\n",
    "Examples: Here are a few examples of generated Cypher \n",
    "statements for particular questions:\n",
    "\n",
    "# What investment firms are in San Francisco?\n",
    "MATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)\n",
    "    WHERE mgrAddress.city = 'San Francisco'\n",
    "RETURN mgr.managerName\n",
    "The question is:\n",
    "{question}\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "acb00bd4-a983-4a27-a176-4fa71c60c359",
   "metadata": {
    "height": 80
   },
   "outputs": [],
   "source": [
    "CYPHER_GENERATION_PROMPT = PromptTemplate(\n",
    "    input_variables=[\"schema\", \"question\"], \n",
    "    template=CYPHER_GENERATION_TEMPLATE\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a8988b57-5cdd-4e33-8b95-9a2579273723",
   "metadata": {
    "height": 114
   },
   "outputs": [],
   "source": [
    "cypherChain = GraphCypherQAChain.from_llm(\n",
    "    ChatOpenAI(temperature=0),\n",
    "    graph=kg,\n",
    "    verbose=True,\n",
    "    cypher_prompt=CYPHER_GENERATION_PROMPT,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fca1d1b0-e192-43a9-9a00-4d81f388df7c",
   "metadata": {
    "height": 63
   },
   "outputs": [],
   "source": [
    "def prettyCypherChain(question: str) -> str:\n",
    "    response = cypherChain.run(question)\n",
    "    print(textwrap.fill(response, 60))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "849556db-797f-4227-ab8d-d95f81c60004",
   "metadata": {
    "height": 46
   },
   "outputs": [],
   "source": [
    "prettyCypherChain(\"What investment firms are in San Francisco?\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "37118b33-2166-49f9-8683-e04f395f13f4",
   "metadata": {
    "height": 29
   },
   "outputs": [],
   "source": [
    "prettyCypherChain(\"What investment firms are in Menlo Park?\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "017af6f6-0025-477e-8d99-ec9fa5add56d",
   "metadata": {
    "height": 29
   },
   "outputs": [],
   "source": [
    "prettyCypherChain(\"What companies are in Santa Clara?\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "53e20b99-8cb6-44ce-b801-a46a7ce0c751",
   "metadata": {
    "height": 46
   },
   "outputs": [],
   "source": [
    "prettyCypherChain(\"What investment firms are near Santa Clara?\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4a957783-d88a-446b-a63f-ac8d7cc2e8a6",
   "metadata": {},
   "source": [
    "### Expand the prompt to teach the LLM new Cypher patterns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3b4b75a5-814f-4734-95e6-20f6afa083b7",
   "metadata": {
    "height": 471
   },
   "outputs": [],
   "source": [
    "CYPHER_GENERATION_TEMPLATE = \"\"\"Task:Generate Cypher statement to query a graph database.\n",
    "Instructions:\n",
    "Use only the provided relationship types and properties in the schema.\n",
    "Do not use any other relationship types or properties that are not provided.\n",
    "Schema:\n",
    "{schema}\n",
    "Note: Do not include any explanations or apologies in your responses.\n",
    "Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.\n",
    "Do not include any text except the generated Cypher statement.\n",
    "Examples: Here are a few examples of generated Cypher statements for particular questions:\n",
    "\n",
    "# What investment firms are in San Francisco?\n",
    "MATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)\n",
    "    WHERE mgrAddress.city = 'San Francisco'\n",
    "RETURN mgr.managerName\n",
    "\n",
    "# What investment firms are near Santa Clara?\n",
    "  MATCH (address:Address)\n",
    "    WHERE address.city = \"Santa Clara\"\n",
    "  MATCH (mgr:Manager)-[:LOCATED_AT]->(managerAddress:Address)\n",
    "    WHERE point.distance(address.location, \n",
    "        managerAddress.location) < 10000\n",
    "  RETURN mgr.managerName, mgr.managerAddress\n",
    "\n",
    "The question is:\n",
    "{question}\"\"\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c97f80a7-62c8-4e52-84d2-1ef45e66dd05",
   "metadata": {},
   "source": [
    "- Update Cypher generation prompt with new template, and re-initialize the Cypher chain to use the new prompt\n",
    "- Rerun this code anytime you make a change to the Cypher generation template!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c989e525-bff0-4b02-b5a4-4d00f178d83f",
   "metadata": {
    "height": 199
   },
   "outputs": [],
   "source": [
    "CYPHER_GENERATION_PROMPT = PromptTemplate(\n",
    "    input_variables=[\"schema\", \"question\"], \n",
    "    template=CYPHER_GENERATION_TEMPLATE\n",
    ")\n",
    "\n",
    "cypherChain = GraphCypherQAChain.from_llm(\n",
    "    ChatOpenAI(temperature=0),\n",
    "    graph=kg,\n",
    "    verbose=True,\n",
    "    cypher_prompt=CYPHER_GENERATION_PROMPT,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "df0bc4de-ae68-459b-b8ef-a4d1e4684e6b",
   "metadata": {
    "height": 46
   },
   "outputs": [],
   "source": [
    "prettyCypherChain(\"What investment firms are near Santa Clara?\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3001e746-1920-43fe-a205-6bc00974d3b8",
   "metadata": {},
   "source": [
    "### Expand the query to retrieve information from the Form 10K chunks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7e1fd6ce-a947-4ca4-98e1-9eff85f54140",
   "metadata": {
    "height": 658
   },
   "outputs": [],
   "source": [
    "CYPHER_GENERATION_TEMPLATE = \"\"\"Task:Generate Cypher statement to query a graph database.\n",
    "Instructions:\n",
    "Use only the provided relationship types and properties in the schema.\n",
    "Do not use any other relationship types or properties that are not provided.\n",
    "Schema:\n",
    "{schema}\n",
    "Note: Do not include any explanations or apologies in your responses.\n",
    "Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.\n",
    "Do not include any text except the generated Cypher statement.\n",
    "Examples: Here are a few examples of generated Cypher statements for particular questions:\n",
    "\n",
    "# What investment firms are in San Francisco?\n",
    "MATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)\n",
    "    WHERE mgrAddress.city = 'San Francisco'\n",
    "RETURN mgr.managerName\n",
    "\n",
    "# What investment firms are near Santa Clara?\n",
    "  MATCH (address:Address)\n",
    "    WHERE address.city = \"Santa Clara\"\n",
    "  MATCH (mgr:Manager)-[:LOCATED_AT]->(managerAddress:Address)\n",
    "    WHERE point.distance(address.location, \n",
    "        managerAddress.location) < 10000\n",
    "  RETURN mgr.managerName, mgr.managerAddress\n",
    "\n",
    "# What does Palo Alto Networks do?\n",
    "  CALL db.index.fulltext.queryNodes(\n",
    "         \"fullTextCompanyNames\", \n",
    "         \"Palo Alto Networks\"\n",
    "         ) YIELD node, score\n",
    "  WITH node as com\n",
    "  MATCH (com)-[:FILED]->(f:Form),\n",
    "    (f)-[s:SECTION]->(c:Chunk)\n",
    "  WHERE s.f10kItem = \"item1\"\n",
    "RETURN c.text\n",
    "\n",
    "The question is:\n",
    "{question}\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "940ae18c-e29d-4970-b3e9-641e0bbcb9eb",
   "metadata": {
    "height": 216
   },
   "outputs": [],
   "source": [
    "CYPHER_GENERATION_PROMPT = PromptTemplate(\n",
    "    input_variables=[\"schema\", \"question\"], \n",
    "    template=CYPHER_GENERATION_TEMPLATE\n",
    ")\n",
    "\n",
    "cypherChain = GraphCypherQAChain.from_llm(\n",
    "    ChatOpenAI(temperature=0),\n",
    "    graph=kg,\n",
    "    verbose=True,\n",
    "    cypher_prompt=CYPHER_GENERATION_PROMPT,\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dcd29b47-f7d3-4b8a-8d6b-b3cdf31ad9fd",
   "metadata": {
    "height": 29
   },
   "outputs": [],
   "source": [
    "prettyCypherChain(\"What does Palo Alto Networks do?\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8efee0c7-731a-4655-b425-ae6d3cdd838e",
   "metadata": {},
   "source": [
    "### Try for yourself!\n",
    "\n",
    "- Update the Cypher generation prompt below to ask different questions of the graph\n",
    "- You can run the \"check schema\" cell to be reminded of the graph structure\n",
    "- Use any of the examples in this notebook, or in previous lessons, to get started\n",
    "- Remember to update the prompt template and restart the GraphCypherQAChain each time you make updates!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "be1d5c18-a480-471e-bd3c-81cc0b6e5f1f",
   "metadata": {
    "height": 63
   },
   "outputs": [],
   "source": [
    "# Check the graph schema\n",
    "kg.refresh_schema()\n",
    "print(textwrap.fill(kg.schema, 60))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "745f3ec8-fabf-4f66-92d1-73632ab77bef",
   "metadata": {
    "height": 658
   },
   "outputs": [],
   "source": [
    "CYPHER_GENERATION_TEMPLATE = \"\"\"Task:Generate Cypher statement to query a graph database.\n",
    "Instructions:\n",
    "Use only the provided relationship types and properties in the schema.\n",
    "Do not use any other relationship types or properties that are not provided.\n",
    "Schema:\n",
    "{schema}\n",
    "Note: Do not include any explanations or apologies in your responses.\n",
    "Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.\n",
    "Do not include any text except the generated Cypher statement.\n",
    "Examples: Here are a few examples of generated Cypher statements for particular questions:\n",
    "\n",
    "# What investment firms are in San Francisco?\n",
    "MATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)\n",
    "    WHERE mgrAddress.city = 'San Francisco'\n",
    "RETURN mgr.managerName\n",
    "\n",
    "# What investment firms are near Santa Clara?\n",
    "  MATCH (address:Address)\n",
    "    WHERE address.city = \"Santa Clara\"\n",
    "  MATCH (mgr:Manager)-[:LOCATED_AT]->(managerAddress:Address)\n",
    "    WHERE point.distance(address.location, \n",
    "        managerAddress.location) < 10000\n",
    "  RETURN mgr.managerName, mgr.managerAddress\n",
    "\n",
    "# What does Palo Alto Networks do?\n",
    "  CALL db.index.fulltext.queryNodes(\n",
    "         \"fullTextCompanyNames\", \n",
    "         \"Palo Alto Networks\"\n",
    "         ) YIELD node, score\n",
    "  WITH node as com\n",
    "  MATCH (com)-[:FILED]->(f:Form),\n",
    "    (f)-[s:SECTION]->(c:Chunk)\n",
    "  WHERE s.f10kItem = \"item1\"\n",
    "RETURN c.text\n",
    "\n",
    "The question is:\n",
    "{question}\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f3cff2ef-cd92-47f5-a2d3-40f13288cdcf",
   "metadata": {
    "height": 216
   },
   "outputs": [],
   "source": [
    "# Update the prompt and reset the QA chain\n",
    "CYPHER_GENERATION_PROMPT = PromptTemplate(\n",
    "    input_variables=[\"schema\", \"question\"], \n",
    "    template=CYPHER_GENERATION_TEMPLATE\n",
    ")\n",
    "\n",
    "cypherChain = GraphCypherQAChain.from_llm(\n",
    "    ChatOpenAI(temperature=0),\n",
    "    graph=kg,\n",
    "    verbose=True,\n",
    "    cypher_prompt=CYPHER_GENERATION_PROMPT,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "367efdd3-ce87-45a5-a1db-e9016f80760e",
   "metadata": {
    "height": 29
   },
   "outputs": [],
   "source": [
    "prettyCypherChain(\"<<REPLACE WITH YOUR QUESTION>>\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
